#Author Martina Bradic
#This script performs a comparison between baseline and on-trx paires. 
#First we cluster all the samples baseline and on-trx and determine their immune type; Supplemental Figure 17
#we then compare changes in immune proportion,  TE expression and IKZF1 expression between pairs pre and post treatment 
# this script generates Figure 6, and Table 1, as well as all associated statistics


library(data.table)
## data.table 1.15.4 using 1 threads (see ?getDTthreads).  Latest news: r-datatable.com
## **********
## This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.
## This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux.
## **********
library(tidyr)
library("SummarizedExperiment")
## Warning: package 'SummarizedExperiment' was built under R version 4.3.2
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## 
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## 
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.3.2
## 
## Attaching package: 'S4Vectors'
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The following objects are masked from 'package:data.table':
## 
##     first, second
## 
## The following object is masked from 'package:utils':
## 
##     findMatches
## 
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## 
## The following object is masked from 'package:data.table':
## 
##     shift
## 
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.3
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## 
## Attaching package: 'Biobase'
## 
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## 
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:Biobase':
## 
##     combine
## 
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## 
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## 
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## 
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## 
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## 
## The following object is masked from 'package:matrixStats':
## 
##     count
## 
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.18.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(ggplot2)
library(limma)
## 
## Attaching package: 'limma'
## 
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
library(edgeR)
## Warning: package 'edgeR' was built under R version 4.3.2
library(survival)
## Warning: package 'survival' was built under R version 4.3.3
library(survminer)
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma
library(ggsci)
## Warning: package 'ggsci' was built under R version 4.3.3
library (factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR)
library (factoextra)
library(immunedeconv)
## Loading required package: EPIC
library(GSVA)
## Warning: package 'GSVA' was built under R version 4.3.3
`%nin%` = Negate(`%in%`)


###################################################  GET GENE MATRIX FROM REDISCOVERTE QUANTIFICATION ######################

#Note: These are normalzied CPM log2 values which are obtained by the scaling method in edgeR, 
## These counts still need to be filtered for low expression and non-variable genes, and normalized using voom method
### prior to differential expression and linear model analysis 

Clinical_phenotypes_ontrx<-read.table("Clinical_phenotypes_ontrx")
gene_expression_mtrx_ontrx<-read.table("gene_expression_mtrx_ontrx")
colnames(gene_expression_mtrx_ontrx)<-gsub(".","-",fixed=TRUE,colnames(gene_expression_mtrx_ontrx))

names_ontrx<-data.frame(colnames(gene_expression_mtrx_ontrx))
colnames(names_ontrx)<-"Pathology_Accession_nb"
names_ontrx<-left_join(names_ontrx, Clinical_phenotypes_ontrx)
## Joining with `by = join_by(Pathology_Accession_nb)`
all.equal(names_ontrx$Pathology_Accession_nb,colnames(gene_expression_mtrx_ontrx))
## [1] TRUE
colnames(gene_expression_mtrx_ontrx)<-names_ontrx$CMO_ID
colnames(gene_expression_mtrx_ontrx)<-paste(colnames(gene_expression_mtrx_ontrx), sep="_","Ontrx")

gene_expression_mtrx_baseline<-read.table("gene_expression_mtrx_baseline")
colnames(gene_expression_mtrx_baseline)<-gsub(".","-",fixed=TRUE,colnames(gene_expression_mtrx_baseline))

colnames(gene_expression_mtrx_baseline)<-paste(colnames(gene_expression_mtrx_baseline), sep="_","base")


all.equal(rownames(gene_expression_mtrx_ontrx),rownames(gene_expression_mtrx_baseline))
## [1] TRUE
gene_expression_mtrx<-cbind(gene_expression_mtrx_baseline,gene_expression_mtrx_ontrx)

#get clinical data
Clinical_phenotypes<-read.table("Clinical_phenotypes_combined.txt", sep="\t", header=TRUE)


#order clinical file based on gene expresion 
Ordered_pheno<-data.frame(colnames(gene_expression_mtrx))
colnames(Ordered_pheno)<-"CMO_ID"
Ordered_pheno<-left_join(Ordered_pheno,Clinical_phenotypes)
## Joining with `by = join_by(CMO_ID)`
Ordered_pheno$`Abbreviation for Figures`<-Ordered_pheno$Abbreviation.for.Figures

#perform filtering
keep.exprs <- filterByExpr(gene_expression_mtrx, group=Ordered_pheno$`Abbreviation for Figures`)
gene_expression_mtrx_filtered <- gene_expression_mtrx[which(keep.exprs==TRUE),]
dim(gene_expression_mtrx_filtered)
## [1] 29432   113
#    after filtering 
#29432    113

### normalize it with voom
normalized_counts<-voom(gene_expression_mtrx_filtered)
normalized_counts<-normalized_counts$E


###contring with mcp_counter mehtod     
res_mcp_counter = deconvolute(normalized_counts, "mcp_counter")
## 
## >>> Running mcp_counter
res_mcp_counter_table_2<-as.data.frame(res_mcp_counter %>% gather(sample, fraction, -cell_type))
res_mcp_counter_table_2_matrix<-spread(res_mcp_counter_table_2, cell_type,fraction)


res_mcp_counter_table<-dplyr::left_join(res_mcp_counter_table_2_matrix,Ordered_pheno,by=c("sample"="CMO_ID"))
rownames(res_mcp_counter_table)<-res_mcp_counter_table$sample
res_mcp_counter_table<-res_mcp_counter_table[,-1]
res_mcp_counter_table_to_plot<-res_mcp_counter_table[,1:11]

mat2<-data.frame(res_mcp_counter_table_to_plot)

####Scale the values 
mat_immuno2<-t(scale(mat2))


all.equal(colnames(mat_immuno2),Ordered_pheno$CMO_ID)
## [1] "113 string mismatches"
#need to reorder
correct_order_for_batch<-data.frame(colnames(mat_immuno2))
colnames(correct_order_for_batch)<-"CMO_ID"
correct_order_for_batch<-left_join(correct_order_for_batch,Ordered_pheno)
## Joining with `by = join_by(CMO_ID)`
all.equal(colnames(mat_immuno2),correct_order_for_batch$CMO_ID)
## [1] TRUE
mat_immuno2_no_batch<-removeBatchEffect(mat_immuno2, correct_order_for_batch$batch,correct_order_for_batch$`Abbreviation for Figures`)


##################################################### DETERMINE CLUSTERS FROM MCP counter using FactoMineR analysis ###############
#set seed for reproducibilty
set.seed(2021)

#We will apply agglomerative HC using Ward's method/Euclidean distance across a range of k from 2-12; we will have FactoMineR suggest the optimal k which is does based on within-cluster sum of squares (i.e. inertia);  the initial partition suggested by HC is followed by a k-means procedure to consolidate cluster membership
res.hc <- HCPC(as.data.frame(t(mat_immuno2_no_batch)), 
               nb.clust = -1,      #Allow FactoMineR to suggest optimal partition (minimize within cluster sum of sq)
               consol=TRUE,        #Perform k-means consolidation after initial HC partition
               iter.max = 10,      #Iterations for the k-means consolidation
               min = 2,            #Min k
               max = 12,           #Max k
               metric="euclidean", #Distance metric
               method="ward",      #Ward's method
               graph = FALSE)

#Lets evaluate between group sum of squares (inertia) before and after k-means consolidation - it should go up as we are maximizing separation
res.hc$call$bw.before.consol
## [1] 2.592913
res.hc$call$bw.after.consol
## [1] 3.120352
#Now that we've performed the clustering, lets visualize the HC dendrogram using factoextra, will use the lancet color palette from ggsci package 
#Note that this reflects the HC partition prior to the k-means consolidation


#Lets export the cluster assignments so that we can compare features in more detail
#write.csv(res.hc[["data.clust"]],"[specify name of output file]")

cluster_assignments<-res.hc[["data.clust"]]


# If you want to plot dendrogram 
#fviz_dend(res.hc, 
#          cex = 0.7,                     # Label size
#          palette = "jco",               # Color palette see ?ggpubr::ggpar
#          rect = TRUE, rect_fill = TRUE, # Add rectangle around groups
#          rect_border = "jco",           # Rectangle color
#          labels_track_height = 0.8      # Augment the room for labels
#)

# Individuals facor map, This is Supplemental Figure 1
fviz_cluster(res.hc, geom = "point", main = "Factor") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.title.x = element_text(size=10),axis.title.y = element_text(size=10), 
        axis.text.x = element_text( size=10),axis.text.y = element_text(size=10)) +
  scale_color_manual(values = c("1" = "red", "2" = "#0000FF"))

#Supplemental table 2: Display quantitative variables that describe the most each cluster
res.hc$desc.var$quanti
## $`1`
##                           v.test Mean in category Overall mean sd in category
## Endothelial.cell       -3.363652       -0.8939000  -0.65229926      0.7444703
## Neutrophil             -5.978128       -0.3572731   0.10100968      0.6432925
## T.cell.CD8.            -6.786158       -0.8515328  -0.28076004      0.7931594
## B.cell                 -6.983992       -0.6059057  -0.01852841      0.6364411
## NK.cell                -7.001257       -0.8815788  -0.31519989      0.6145970
## cytotoxicity.score     -7.298817       -1.0814077  -0.47907261      0.6525948
## Myeloid.dendritic.cell -7.308607       -0.8801371  -0.28172804      0.6780153
## Monocyte               -7.419175       -0.5542371  -0.01419596      0.5881823
## Macrophage.Monocyte    -7.419175       -0.5542371  -0.01419596      0.5881823
## T.cell                 -7.962981       -1.1257962  -0.49999310      0.4904141
##                        Overall sd      p.value
## Endothelial.cell        0.7806007 7.691855e-04
## Neutrophil              0.8331246 2.257163e-09
## T.cell.CD8.             0.9140731 1.151591e-11
## B.cell                  0.9140186 2.869076e-12
## NK.cell                 0.8791698 2.536765e-12
## cytotoxicity.score      0.8968655 2.903093e-13
## Myeloid.dendritic.cell  0.8898263 2.699260e-13
## Monocyte                0.7910664 1.178525e-13
## Macrophage.Monocyte     0.7910664 1.178525e-13
## T.cell                  0.8540900 1.679431e-15
## 
## $`2`
##                          v.test Mean in category Overall mean sd in category
## T.cell                 7.962981        0.1599447  -0.49999310      0.6297582
## Monocyte               7.419175        0.5553020  -0.01419596      0.5375842
## Macrophage.Monocyte    7.419175        0.5553020  -0.01419596      0.5375842
## Myeloid.dendritic.cell 7.308607        0.3493216  -0.28172804      0.6050962
## cytotoxicity.score     7.298817        0.1561171  -0.47907261      0.6460940
## NK.cell                7.001257        0.2820725  -0.31519989      0.7033419
## B.cell                 6.983992        0.6008876  -0.01852841      0.7360507
## T.cell.CD8.            6.786158        0.3211457  -0.28076004      0.5893853
## Neutrophil             5.978128        0.5842897   0.10100968      0.7311751
## Endothelial.cell       3.363652       -0.3975203  -0.65229926      0.7355115
##                        Overall sd      p.value
## T.cell                  0.8540900 1.679431e-15
## Monocyte                0.7910664 1.178525e-13
## Macrophage.Monocyte     0.7910664 1.178525e-13
## Myeloid.dendritic.cell  0.8898263 2.699260e-13
## cytotoxicity.score      0.8968655 2.903093e-13
## NK.cell                 0.8791698 2.536765e-12
## B.cell                  0.9140186 2.869076e-12
## T.cell.CD8.             0.9140731 1.151591e-11
## Neutrophil              0.8331246 2.257163e-09
## Endothelial.cell        0.7806007 7.691855e-04
#show principal dimensions that are the most associated with clusters, type this:
res.hc$desc.axes$quanti
## $`1`
##          v.test Mean in category  Overall mean sd in category Overall sd
## Dim.1 -9.017132         -1.71767 -1.328338e-15       1.062387   2.070204
##            p.value
## Dim.1 1.930773e-19
## 
## $`2`
##         v.test Mean in category  Overall mean sd in category Overall sd
## Dim.1 9.017132         1.811362 -1.328338e-15       1.105745   2.070204
##            p.value
## Dim.1 1.930773e-19
#Finally, representative individuals of each cluster can be extracted as follow:
res.hc$desc.ind$para
## Cluster: 1
## C-CPRNJ2_base C-1LM4EJ_base C-WD4RUC_base C-2M22RC_base C-6PL9CN_base 
##     0.7655650     0.8278142     0.8862958     0.9670174     1.0820552 
## ------------------------------------------------------------ 
## Cluster: 2
##  C-AL8WAK_base C-CPRNJ2_Ontrx C-MA4H5N_Ontrx C-2M22RC_Ontrx C-46JU8E_Ontrx 
##      0.7930102      0.8378519      0.8467655      0.9675638      1.0185951
Cluster_1_individuals_contrib<-unlist(res.hc$desc.ind$para[1])
Cluster_1_individuals_contrib<-names(Cluster_1_individuals_contrib)
Cluster_1_individuals_contrib<- gsub("1.","", Cluster_1_individuals_contrib)
Cluster_2_individuals_contrib<-unlist(res.hc$desc.ind$para[2])
Cluster_2_individuals_contrib<-names(Cluster_2_individuals_contrib)
Cluster_2_individuals_contrib<- gsub("2.","", Cluster_2_individuals_contrib)

#match orders of samples
all.equal(rownames(cluster_assignments), colnames(mat_immuno2))
## [1] TRUE
all.equal(rownames(cluster_assignments), colnames(t(mat2)))
## [1] TRUE
all.equal(rownames(cluster_assignments), colnames(mat_immuno2_no_batch))
## [1] TRUE
#call cluster names
cluster_assignments$Immune_subtypes<-ifelse(cluster_assignments$clust %in% 1, "Immune cold","Immune hot")
cluster_assignments$CMO_ID<-rownames(cluster_assignments)


#Assign cluster names into phenotyo phile
Ordered_pheno<-left_join(Ordered_pheno,cluster_assignments[,c("CMO_ID","Immune_subtypes")])
## Joining with `by = join_by(CMO_ID)`
############################################################################################################################

#             GET Trasposable element counts from REDISCOVERTE, normalize 
############################################################################################################################


##################################.   GET Transportable element counts from REDISCOVER TE pipeline and prepare for analysis  ##########################################
### get TEs expression only for LTR, LINE, SINE elements

#GEt only Intergenic baseline
INTERGENIC_TE_normalized_expression_1052_repeats_baseline<-read.table("INTERGENIC_TE_normalized_expression_1052_repeats_baseline")
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_baseline)<-gsub(".","-",fixed=TRUE,colnames(INTERGENIC_TE_normalized_expression_1052_repeats_baseline))


#read intergenic TEs ontrx

INTERGENIC_TE_normalized_expression_1052_repeats_ontrx<-read.table("INTERGENIC_TE_normalized_expression_1052_repeats_ontrx")
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx)<-gsub(".","-",fixed=TRUE,colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx))

dim(INTERGENIC_TE_normalized_expression_1052_repeats_baseline)
## [1] 1052   67
dim(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx)
## [1] 1052   46
#all 1052 are here 
sharedTEs<-intersect(rownames(INTERGENIC_TE_normalized_expression_1052_repeats_baseline),rownames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx))


#MERGE THEM ALL TOGETHER 

#renames sample names for baseline and ontrx first
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_baseline)<-paste(colnames(INTERGENIC_TE_normalized_expression_1052_repeats_baseline), sep="_","base")


names_ontrx_TE<-data.frame(colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx))
colnames(names_ontrx_TE)<-"Pathology_Accession_nb"
names_ontrx_TE<-left_join(names_ontrx_TE, Clinical_phenotypes_ontrx)
## Joining with `by = join_by(Pathology_Accession_nb)`
all.equal(names_ontrx_TE$Pathology_Accession_nb,colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx))
## [1] TRUE
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx)<-names_ontrx_TE$CMO_ID
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx)<-paste(colnames(INTERGENIC_TE_normalized_expression_1052_repeats_ontrx), sep="_","Ontrx")



INTERGENIC_TE_normalized_expression_1052_repeats<-cbind(INTERGENIC_TE_normalized_expression_1052_repeats_baseline,INTERGENIC_TE_normalized_expression_1052_repeats_ontrx)



#Match TEs with their familes 
TE_names_order<-data.frame(rownames(INTERGENIC_TE_normalized_expression_1052_repeats))
colnames(TE_names_order)<-"repName"
TE_family_annotation<-read.table("TE_family_annotation")
TE_names_order<-left_join(TE_names_order, TE_family_annotation)
## Joining with `by = join_by(repName)`
TE_names_order$repClass<-gsub("LTR?", "LTR", fixed=TRUE, TE_names_order$repClass)
TE_names_order$repClass<-gsub("SINE?", "SINE", fixed=TRUE, TE_names_order$repClass)


#### Filter gene TE matrix filterByExpr
#TEs are rows, samples are columns, use expression per group as a minimum 

#check the order of samples
all.equal(colnames(INTERGENIC_TE_normalized_expression_1052_repeats), Ordered_pheno$CMO_ID)
## [1] TRUE
keep.exprs_TE <- filterByExpr(INTERGENIC_TE_normalized_expression_1052_repeats, group=Ordered_pheno$`Abbreviation for Figures`)
INTERGENIC_TE_normalized_expression_1052_repeats_filtered <- INTERGENIC_TE_normalized_expression_1052_repeats[which(keep.exprs_TE==TRUE),]
dim(INTERGENIC_TE_normalized_expression_1052_repeats_filtered)
## [1] 995 113
all.equal(colnames(INTERGENIC_TE_normalized_expression_1052_repeats_filtered), Ordered_pheno$CMO_ID)
## [1] TRUE
###There are 844 TEs that are left here to be analysed

### normalize it with voom
normalized_intergenic_TE<-voom(INTERGENIC_TE_normalized_expression_1052_repeats_filtered)
normalized_TE_counts<-normalized_intergenic_TE$E

###Continue analysis with TE normalized data
INTERGENIC_TE_normalized_expression_1052_repeats<-data.frame(t(normalized_TE_counts))

INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed<-t(INTERGENIC_TE_normalized_expression_1052_repeats %>% mutate_at(colnames(INTERGENIC_TE_normalized_expression_1052_repeats), scale))
colnames(INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed)<-rownames(INTERGENIC_TE_normalized_expression_1052_repeats)

#remove those with NAs, these are 3 TEs acros all the samples only after we transfor with Z-score
INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed<-INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed[!rowSums(is.na(INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed)) > 0,]

INTERGENIC_TE_normalized_expression_1052_repeats<-t(INTERGENIC_TE_normalized_expression_1052_repeats)

#check if Prototypes order match to TE matrix order
all.equal(Ordered_pheno$CMO_ID,colnames(INTERGENIC_TE_normalized_expression_1052_repeats))
## [1] TRUE
##################### PLOT INTERGENIC NORMALIZED EXPRESSION OVER TWO CLSUTERS

INTERGENIC_TE_normalized_expression_1052_repeats_no_batch<-removeBatchEffect(INTERGENIC_TE_normalized_expression_1052_repeats_Z_sore_transformed, Ordered_pheno$batch,Ordered_pheno$`Abbreviation for Figures`)

#TE_intergenic_for_heatmap<-t(scale(INTERGENIC_TE_normalized_expression_1052_repeats))

TE_intergenic_for_heatmap<-INTERGENIC_TE_normalized_expression_1052_repeats_no_batch



#For_heatmap_TE_annotation<-readRDS("~/juno/work/ccs/badicm/Project_NACEV/RNAseq/REdiscoverTE/REdiscoverTE/ANALYSIS_FINAL/Rollup_analysis/ALL_INCLUDING_DICSON_projects/RESULTS/RE_intergenic_2_counts_normalized.RDS")
TE_intergenic_for_heatmap_row_anno<-data.frame(rownames(TE_intergenic_for_heatmap))
colnames(TE_intergenic_for_heatmap_row_anno)<-"repName"

# Fix some TE family names, replace "." with "-" 
TE_intergenic_for_heatmap_row_anno$repName<-gsub("CR1.","CR1-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub(".int","-int",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("ERV3.","ERV3-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("ERVL.","ERVL-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("hAT.","hAT-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("HERV.","HERV-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("HUERS.","HUERS-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("L1PA15.","L1PA15-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("L2.","L2-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("MamGypsy2.","MamGypsy2-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)
TE_intergenic_for_heatmap_row_anno$repName<-gsub("ORSL.","ORSL-",TE_intergenic_for_heatmap_row_anno$repName,fixed = TRUE)


TE_intergenic_for_heatmap_row_anno<-left_join(TE_intergenic_for_heatmap_row_anno,TE_family_annotation)
## Joining with `by = join_by(repName)`
TE_intergenic_for_heatmap_row_anno$repClass<-gsub("LTR?","LTR",fixed = TRUE,TE_intergenic_for_heatmap_row_anno$repClass)
TE_intergenic_for_heatmap_row_anno$repClass<-gsub("SINE?","SINE",fixed = TRUE,TE_intergenic_for_heatmap_row_anno$repClass)

#recheck if it was replaced
TE_intergenic_for_heatmap_row_anno[is.na(TE_intergenic_for_heatmap_row_anno$repClas),]
## [1] repName   repFamily repClass 
## <0 rows> (or 0-length row.names)
###Create annotations and plot Supplemental Figure 5

#Annotation for columns
ann_for_2_types <- data.frame(Ordered_pheno[,c("Response","Abbreviation for Figures", "Sample_Timepoint")]) 
rownames(ann_for_2_types)<-Ordered_pheno$"CMO_ID"
colnames(ann_for_2_types) <- c("Response","Subtype","Sample_Timepoint") 



colours_responders <- list(
  'Response' = c("PD"="#543005","SD"="#A6611A" , "CR/PR"= "#F6E8C3"),
  'Subtype'=c ("ANGS"="#DD67C0" ,"CHS"="#A848E0","DDLS"= "#DEABCA","LMS"= "#F4A460","OS"= "#8881D2" ,"SARCNOS"="#8DB6D1","UPS"= "#82D992","EHE"="#FFFF00","LPS"="#0000FF","MFS"="#B22222","Other"="#FF0000","SBRC"="#85E0D6","ASPS"="grey"),
  
  'Sample_Timepoint'=c("Baseline"="blue",'Ontrx'="red")
)


colAnn2 <- HeatmapAnnotation(df = ann_for_2_types,
                             which = "col",
                             col = colours_responders,
                             annotation_width = unit(c(1, 4), "cm"),
                             gap = unit(1,"mm"))


#Annotation for rows

colours_rows <- list(
  'repClass' = c("DNA"="orange","SINE"="darkpurple" , "LINE"= "darkblue","LTR"="darkgreen")
)

rowAnn <- HeatmapAnnotation(df = TE_intergenic_for_heatmap_row_anno$repClass,
                            which = "row",
                            col = colours_rows,
                            annotation_width = unit(c(1, 4), "cm"),
                            gap = unit(1,"mm"))




################################.#######################################   CALCALATE TE scores  ########################################


TE_matrix<-INTERGENIC_TE_normalized_expression_1052_repeats
TE_matrix<-t(TE_matrix)
#Check if the ORder pheno is the same sample order with TE_matrix
all.equal(rownames(TE_matrix) , Ordered_pheno$CMO_ID)
## [1] TRUE
INTERGENIC_TE_normalized_expression_1052_repeats[rownames(INTERGENIC_TE_normalized_expression_1052_repeats) %in% c("MER57F","Tigger17a","MER61F","MER45A","LTR104_Mam","HERVL74.int"),]
##             C-VKXN13_base C-P61F3Y_base C-F5U9XF_base C-X96M0A_base
## HERVL74.int      7.364396      7.530934     10.514843      7.754191
## LTR104_Mam       8.556495      8.267900      9.194143      8.796011
## MER45A           8.623783      8.663129     10.534372      9.437557
## MER57F           6.187899      7.377311     13.637270      7.494547
## MER61F           6.264861      6.259129      9.965084      7.494547
## Tigger17a        8.420139      8.538202      9.635039      8.952130
##             C-82E9M5_base C-5JJPJR_base C-KAJ2A6_base C-AL8WAK_base
## HERVL74.int      8.317869      9.760623      7.918643      8.584018
## LTR104_Mam       9.288722      9.921088      8.678623      9.140050
## MER45A           9.618202      9.087098      8.525193      9.403597
## MER57F           8.334263      7.831308      6.212756      7.426477
## MER61F           7.785521      6.690648      6.063378      6.836461
## Tigger17a        9.100277      9.301451      8.581227      8.884991
##             C-532E5K_base C-C3AJRC_base C-VK8L7J_base C-LEC8C7_base
## HERVL74.int      9.188399      8.985157      6.378836      7.086029
## LTR104_Mam       8.919695      8.289513      8.140225      8.433300
## MER45A           9.553063      8.700301      9.124572      8.895990
## MER57F           8.540805      6.317527      6.749204      5.736444
## MER61F           7.240866      5.153140      9.492959      6.139800
## Tigger17a        8.589222      9.033251      8.583526      8.447584
##             C-CPRNJ2_base C-CC1UXH_base C-MDEDT7_base C-6HEVUK_base
## HERVL74.int      7.245994      7.523186      8.884989      7.587443
## LTR104_Mam       8.373749      8.627137      8.769215      8.620074
## MER45A           8.945063      8.894024      9.073436      8.873087
## MER57F           6.191546      7.057522      6.334145     11.924772
## MER61F           7.413939      5.328170      7.646735      7.295839
## Tigger17a        9.268088      8.411159      8.435231      8.647468
##             C-MA4H5N_base C-68NL7N_base C-TDR4W9_base C-70AEFR_base
## HERVL74.int      9.848327      8.605475      9.023267      8.074858
## LTR104_Mam       9.095952      8.970931      8.262938      7.926759
## MER45A           9.948246      8.909091     10.597237      8.209159
## MER57F          10.940178      5.717950      9.732150      6.176737
## MER61F           6.734764      7.168611      6.555118      5.380271
## Tigger17a        9.036655      9.141976      8.243174      8.566971
##             C-M81LDF_base C-ELUPUM_base C-WT3724_base C-XPH66L_base
## HERVL74.int      9.658450      8.566307      8.373617      7.670697
## LTR104_Mam       8.221933      8.341434      9.443551      8.051278
## MER45A           9.700473      9.434000      8.373617      7.775667
## MER57F           4.800469      8.885258      6.179606      5.628010
## MER61F           6.385431      7.323946      6.264495      5.112702
## Tigger17a        8.958898      9.146303      8.054075      8.008866
##             C-NHKURN_base C-U859RF_base C-7E9EXV_base C-LLWLEV_base
## HERVL74.int      6.950878      7.913118      9.447005      8.721269
## LTR104_Mam       8.159692      8.469511      8.599008      9.048192
## MER45A           8.980143      8.798348      9.794559     10.128991
## MER57F           5.663997     12.427797     12.179698      9.529814
## MER61F           3.342069      5.591190      7.798617      7.973141
## Tigger17a        7.734386      8.663599      8.689206      8.951976
##             C-28PL34_base C-81D6WE_base C-YHLTV1_base C-TN74NL_base
## HERVL74.int      9.244488      8.262996      8.154943      9.849528
## LTR104_Mam       8.567118      8.886341      8.712072      8.958815
## MER45A           7.996122      9.833672      8.931785      9.821568
## MER57F           7.004437      8.705291      5.904717     11.706267
## MER61F           4.809421      7.913308      6.968847      6.378977
## Tigger17a        9.272300      8.598044      8.648225      8.863307
##             C-8EEM67_base C-L7M7WP_base C-44CDAN_base C-CTK666_base
## HERVL74.int      8.343546     10.272545      8.608068      7.753411
## LTR104_Mam       8.776132      9.198710      8.701782      7.438255
## MER45A          10.001772      8.824802      8.646287      7.800825
## MER57F          10.843671      7.532135      5.630785      7.609375
## MER61F           8.891034      6.067779      6.143787      5.258931
## Tigger17a        9.072418      8.923768      8.486932      8.090332
##             C-R5FT5U_base C-2EE23U_base C-E36838_base C-MVMX43_base
## HERVL74.int      8.928043      7.688649      8.691849      9.850345
## LTR104_Mam       8.498875      8.711251      8.646406      8.861720
## MER45A           9.161242      8.703558     10.405136     10.621913
## MER57F           5.597654      6.163188      8.523831      7.117181
## MER61F           7.238111      6.328247      9.708528      9.244293
## Tigger17a        9.057085      7.913209      9.029350      9.323210
##             C-556AL5_base C-L7C8JC_base C-6RJP70_base C-NF6VH4_base
## HERVL74.int      8.641790      8.229167      8.251716      8.537329
## LTR104_Mam       8.746759      8.637863      8.370036      8.258514
## MER45A           9.699594      9.092086      8.010708      9.099986
## MER57F           5.416370      7.115096      6.935940      6.865281
## MER61F           7.804640      7.525560      5.904913      5.994014
## Tigger17a        9.345715      9.362062      8.458167      8.707924
##             C-WJ49UR_base C-20FDCU_base C-4DV3D5_base C-FN268L_base
## HERVL74.int      9.076749      8.286500      8.306275      9.312354
## LTR104_Mam       8.889730      8.317197      8.427412      8.617824
## MER45A          10.255901      9.023466      9.479600      9.513052
## MER57F           8.277047      7.724024      5.852099      6.416190
## MER61F           9.059101      7.937166      6.162440      6.990426
## Tigger17a        9.059101      8.541757      8.556971      8.603817
##             C-K9U229_base C-H7RF5A_base C-20LTFP_base C-WD4RUC_base
## HERVL74.int      9.043031      9.090380      9.026126      7.062021
## LTR104_Mam       8.778654      8.833524      9.056028      8.523248
## MER45A           9.277832      8.609898      9.583124      9.555191
## MER57F           5.811399      7.529974      7.924431      5.730177
## MER61F           6.909548      6.511596      7.638522      5.264513
## Tigger17a        9.015938      8.303009      8.724822      9.234140
##             C-001528_base C-46JU8E_base C-WKND2T_base C-4PEVTX_base
## HERVL74.int      8.972761     10.690109      9.295882      9.227733
## LTR104_Mam       9.019810      9.262909      8.853432      8.938226
## MER45A           8.821644     10.108349      8.772157      9.003077
## MER57F           5.865233      9.155552      6.649046      7.965603
## MER61F           7.027504      8.510534      6.472890      7.965603
## Tigger17a        8.839238      9.308174      9.054726      9.162986
##             C-1LM4EJ_base C-Y6TH4J_base C-JNLDDC_base C-1PCN42_base
## HERVL74.int      8.024477     10.400838      8.960997      9.908911
## LTR104_Mam       8.747158      8.494640      8.769268      9.325686
## MER45A           9.083655      9.737643      9.427987      9.642396
## MER57F           6.192031     10.297540      5.539533      9.770365
## MER61F           5.313337      7.750738      6.928576      8.330885
## Tigger17a        8.441290      9.702128      8.158443      8.819274
##             C-WNVW3H_base C-70K997_base C-X867R9_base C-J0AW9Y_base
## HERVL74.int      9.115802      8.838609      9.765819      8.289175
## LTR104_Mam       8.712918      8.068091      8.517284      9.074235
## MER45A           9.973530      8.846242      9.716406      8.996495
## MER57F           7.602274      7.175006      7.616650      7.050388
## MER61F           6.779152      5.952614      6.680783      5.775766
## Tigger17a        9.130374      8.898574      8.789904      7.787354
##             C-6PL9CN_base C-2M22RC_base C-2FPL8C_base C-C3AJRC_Ontrx
## HERVL74.int      8.330435      7.183097      8.519935       8.928316
## LTR104_Mam       9.151210      7.414423      8.354198       8.165478
## MER45A           9.056573     10.281587      9.514332       9.088639
## MER57F           7.202730     12.351279     11.234926       9.069573
## MER61F           5.676430      6.746998      5.209147       5.807178
## Tigger17a        8.247179      7.868989      8.427571       8.878298
##             C-532E5K_Ontrx C-AL8WAK_Ontrx C-LEC8C7_Ontrx C-CPRNJ2_Ontrx
## HERVL74.int       9.959216       9.251875       6.856488       7.971688
## LTR104_Mam        9.248464       9.273775       8.359863       8.400855
## MER45A           10.135112       9.461999       8.974252       9.054465
## MER57F           10.400236       9.088486       5.811164       7.803182
## MER61F            7.537549       7.116025       5.614128       7.582916
## Tigger17a         8.962047       9.012370       8.974252       9.197322
##             C-F5U9XF_Ontrx C-WT3724_Ontrx C-XPH66L_Ontrx C-NHKURN_Ontrx
## HERVL74.int       8.280177       8.419967       7.170519       7.213268
## LTR104_Mam        9.336162       9.220188       8.858575       8.092586
## MER45A            9.211085       8.804826       8.336878       8.725167
## MER57F           11.829121       7.275093       5.926594       5.827614
## MER61F            8.056054       5.983327       4.848591       5.396980
## Tigger17a         9.316054       8.401470       8.473082       8.073090
##             C-LLWLEV_Ontrx C-7E9EXV_Ontrx C-MA4H5N_Ontrx C-6HEVUK_Ontrx
## HERVL74.int       9.994478       8.735906       8.694976       8.305679
## LTR104_Mam        9.162703       8.796874       8.561298       8.787387
## MER45A           10.237837       9.831284       9.326008      10.742484
## MER57F           13.041496       8.983287       8.103616      10.468365
## MER61F            8.989371       6.826418       6.919191       5.759190
## Tigger17a         9.442277       8.693781       9.358613       8.406080
##             C-28PL34_Ontrx C-TDR4W9_Ontrx C-TN74NL_Ontrx C-70AEFR_Ontrx
## HERVL74.int       9.428621       8.956672      10.263260       8.399700
## LTR104_Mam        8.416166       8.216127       8.697281       8.268455
## MER45A            8.465354      10.663389       9.743871      10.001293
## MER57F            6.912725       7.599455      11.800740      10.762146
## MER61F            4.888429       6.377063       4.247248       6.077771
## Tigger17a         9.031387       8.045081       8.939739       8.565437
##             C-U859RF_Ontrx C-81D6WE_Ontrx C-YHLTV1_Ontrx C-8EEM67_Ontrx
## HERVL74.int       8.781301       8.833011       9.106663       7.467569
## LTR104_Mam        8.563496       9.079706       9.142620       8.888812
## MER45A            9.681437      10.364948       9.237116       8.937515
## MER57F           13.741983      10.940229       6.290177       6.472280
## MER61F            6.696134       9.054505       5.927607       6.069020
## Tigger17a         9.152016       9.002744       8.746691       8.425505
##             C-M81LDF_Ontrx C-ELUPUM_Ontrx C-44CDAN_Ontrx C-L7M7WP_Ontrx
## HERVL74.int       7.633925       8.696502       8.501446       8.902762
## LTR104_Mam        9.062768       8.569868       8.772216       8.146528
## MER45A            8.649272      10.016003       8.847373       8.604284
## MER57F            6.969109      10.832351       4.684753       5.893113
## MER61F            5.311997       6.755880       6.953242       7.228990
## Tigger17a         9.012436       9.301530       8.344262       8.600439
##             C-R5FT5U_Ontrx C-556AL5_Ontrx C-WJ49UR_Ontrx C-L7C8JC_Ontrx
## HERVL74.int       9.261193       8.812480       8.592005       8.912503
## LTR104_Mam        8.820118       8.620999       8.719922       8.721981
## MER45A            8.982593       9.479301       8.095067       9.472461
## MER57F            6.336601       7.275612       7.135199       7.927300
## MER61F            7.690238       8.356102       4.523525       8.566831
## Tigger17a         8.912630       9.539631       8.319384       9.823025
##             C-WKND2T_Ontrx C-NF6VH4_Ontrx C-H7RF5A_Ontrx C-4DV3D5_Ontrx
## HERVL74.int       8.923838       7.921987       7.419163       9.340892
## LTR104_Mam        8.663549       8.454753       8.829795       8.851931
## MER45A            8.932969       8.382084       9.592992       9.765822
## MER57F            6.690719       6.277215       9.823162      10.399256
## MER61F            7.843799       7.014180       6.309538       7.254531
## Tigger17a         8.923838       8.316743       8.829795       8.638797
##             C-MVMX43_Ontrx C-FN268L_Ontrx C-20LTFP_Ontrx C-001528_Ontrx
## HERVL74.int       9.093408       9.214025       8.923118       8.815503
## LTR104_Mam        7.965067       8.381557       8.766528       8.984322
## MER45A            9.142929       9.593273       8.987607       8.733626
## MER57F            5.683497       5.192896       7.846946       5.723056
## MER61F            6.317369       6.833353       7.811757       5.095025
## Tigger17a         8.504996       8.866984       9.220100       8.669016
##             C-46JU8E_Ontrx C-6RJP70_Ontrx C-WD4RUC_Ontrx C-1LM4EJ_Ontrx
## HERVL74.int       9.208241      10.234411       8.109089       8.737623
## LTR104_Mam        9.454092       8.728116       9.522810       8.760987
## MER45A           10.894665       8.560880       9.364204       9.223050
## MER57F           10.572124       8.065749       4.917948       7.766769
## MER61F            9.399900       5.324992       7.200882       6.010438
## Tigger17a         9.176357       8.371685       9.093206       8.597843
##             C-20FDCU_Ontrx C-WNVW3H_Ontrx C-J0AW9Y_Ontrx C-6PL9CN_Ontrx
## HERVL74.int       9.122361       9.242671       8.109465       8.736709
## LTR104_Mam        8.678491       8.623945       9.063395       9.466488
## MER45A           10.316241       9.079788       9.254444       8.585702
## MER57F            8.782202       6.935281       6.682800       8.005705
## MER61F            8.847420       7.298069       6.724343       6.359989
## Tigger17a         8.678491       8.623945       8.280306       8.952687
##             C-2M22RC_Ontrx
## HERVL74.int       8.264716
## LTR104_Mam        8.199127
## MER45A           10.380193
## MER57F            9.784090
## MER61F            6.860325
## Tigger17a         8.285932
#calculate TE score for significant Te features 
TE_features<-c("Tigger17a","MER61F","MER45A","LTR104_Mam","HERVL74.int")

#geneSets<-list()
#geneSets$TE_features<-as.character(TE_features)
#geneSets<-lapply(geneSets, function(z){ z[!is.na(z) & z != ""]})

TE_genes_matrix<-as.matrix(TE_matrix[,colnames(TE_matrix) %in% TE_features])
TE_genes_matrix_df<-data.frame(TE_genes_matrix)
TE_genes_matrix_df$CMO_ID<-rownames(TE_genes_matrix_df)

TE_genes_matrix_df<-left_join(TE_genes_matrix_df, Ordered_pheno)
## Joining with `by = join_by(CMO_ID)`
TE_features<-data.frame(TE_features)

#calculate TE scores
geneSets<-as.list(TE_features)
geneSets$TE_features<-as.character(geneSets$TE_features)
geneSets<-lapply(geneSets, function(z){ z[!is.na(z) & z != ""]})

TE_genes_matrix<-as.matrix(TE_matrix[,colnames(TE_matrix) %in% as.character(TE_features$TE_features)])

gsva_for_TE__genes <-gsva(t(TE_genes_matrix),geneSets,method="zscore", kcdf="Gaussian",verbose=FALSE)
## Warning: Calling gsva(expr=., gset.idx.list=., method=., ...) is deprecated; use
## a method-specific parameter object (see '?gsva').
#check for same order
all.equal(colnames(gsva_for_TE__genes),Ordered_pheno$CMO_ID)
## [1] TRUE
TE_genes_matrix_df<- data.frame(t(gsva_for_TE__genes), Ordered_pheno)



##############################################     Function to plot regression.  ############################################## 
ggplotRegression <- function (fit) {
  
  require(ggplot2)
  
  ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) + 
    geom_point() +
    stat_smooth(method = "lm", col = "red") +
    labs(title = paste("Adj R2 = ",signif(summary(fit)$adj.r.squared, 5),
                       "Intercept =",signif(fit$coef[[1]],5 ),
                       " Slope =",signif(fit$coef[[2]], 5),
                       " P =",signif(summary(fit)$coef[2,4], 5)))
}


################################################.  COMPARISON BETWEEN PAIRS for immune cells, TEs, TLS and IKZF1 #########################################################
#load function to run this analysis

source("Analyze_individual_groups_function.R")


###Run only one comparison at the the time, and continue to "RUN PAIRS COMPARISON" section and run till the end

##COMPARISON 1
#get pairs that go from cold to hot
Cold_to_hot_switch<-fread("Cold_to_hot_switch.txt",sep="\t", header=TRUE)
get_individual_pairs_comparison_calculations(Cold_to_hot_switch)
## Joining with `by = join_by(CMO_ID)`
## [[1]]

## 
## [[2]]
##                                  p_values
## B.cell                       5.439675e-04
## Cancer.associated.fibroblast 3.197840e-01
## cytotoxicity.score           2.034926e-04
## Endothelial.cell             1.822876e-01
## Macrophage.Monocyte          9.273695e-04
## Monocyte                     9.273695e-04
## Myeloid.dendritic.cell       1.564007e-02
## Neutrophil                   6.199749e-03
## NK.cell                      1.382360e-04
## T.cell                       2.074388e-06
## T.cell.CD8.                  6.321987e-05
## 
## [[3]]

## 
## [[4]]
## 
##  Paired t-test
## 
## data:  IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Baseline", ]$IKZF1 and IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Ontrx", ]$IKZF1
## t = -8.249, df = 9, p-value = 1.731e-05
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.790046 -1.019560
## sample estimates:
## mean difference 
##       -1.404803 
## 
## 
## [[5]]
## 
##  Paired t-test
## 
## data:  TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Baseline", ]$TE_features and TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Ontrx", ]$TE_features
## t = -4.3051, df = 9, p-value = 0.001976
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.9246235 -0.5987162
## sample estimates:
## mean difference 
##        -1.26167
#######################

##COMPARISON 2
#get pairs that go from hot to cold

Hot_to_cold_switch<-fread("Hot_to_cold_switch.txt",sep="\t", header=TRUE)
get_individual_pairs_comparison_calculations(Hot_to_cold_switch)
## Joining with `by = join_by(CMO_ID)`
## [[1]]

## 
## [[2]]
##                                 p_values
## B.cell                       0.032370699
## Cancer.associated.fibroblast 0.702396095
## cytotoxicity.score           0.114541456
## Endothelial.cell             0.130527610
## Macrophage.Monocyte          0.006439886
## Monocyte                     0.006439886
## Myeloid.dendritic.cell       0.006430051
## Neutrophil                   0.010477877
## NK.cell                      0.113927138
## T.cell                       0.015542421
## T.cell.CD8.                  0.370287787
## 
## [[3]]

## 
## [[4]]
## 
##  Paired t-test
## 
## data:  IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Baseline", ]$IKZF1 and IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Ontrx", ]$IKZF1
## t = 3.5089, df = 6, p-value = 0.01269
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.3914439 2.1953183
## sample estimates:
## mean difference 
##        1.293381 
## 
## 
## [[5]]
## 
##  Paired t-test
## 
## data:  TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Baseline", ]$TE_features and TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Ontrx", ]$TE_features
## t = 2.3192, df = 6, p-value = 0.05952
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.09608835  3.58526666
## sample estimates:
## mean difference 
##        1.744589
#######################


##COMPARISON 3
#get pairs that go from hot to cold
NO_switch_samples<-fread("NO_switch_samples.txt",sep="\t", header=TRUE)
NO_switch_samples_cold<-NO_switch_samples[NO_switch_samples$`After treatment` %in% "Immune cold",]
get_individual_pairs_comparison_calculations(NO_switch_samples_cold)
## Joining with `by = join_by(CMO_ID)`
## [[1]]

## 
## [[2]]
##                                 p_values
## B.cell                       0.167346206
## Cancer.associated.fibroblast 0.031611882
## cytotoxicity.score           0.004164780
## Endothelial.cell             0.228541727
## Macrophage.Monocyte          0.854317437
## Monocyte                     0.854317437
## Myeloid.dendritic.cell       0.621754784
## Neutrophil                   0.836711124
## NK.cell                      0.026767226
## T.cell                       0.006421621
## T.cell.CD8.                  0.005220749
## 
## [[3]]

## 
## [[4]]
## 
##  Paired t-test
## 
## data:  IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Baseline", ]$IKZF1 and IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Ontrx", ]$IKZF1
## t = -1.2829, df = 14, p-value = 0.2204
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.0121270  0.2545154
## sample estimates:
## mean difference 
##      -0.3788058 
## 
## 
## [[5]]
## 
##  Paired t-test
## 
## data:  TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Baseline", ]$TE_features and TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Ontrx", ]$TE_features
## t = -0.9825, df = 14, p-value = 0.3425
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.3481609  0.5010579
## sample estimates:
## mean difference 
##      -0.4235515
#######################


##COMPARISON 4
NO_switch_samples_hot<-NO_switch_samples[NO_switch_samples$`After treatment` %in% "Immune hot",]
get_individual_pairs_comparison_calculations(NO_switch_samples_hot)
## Joining with `by = join_by(CMO_ID)`
## [[1]]

## 
## [[2]]
##                                p_values
## B.cell                       0.93027100
## Cancer.associated.fibroblast 0.73652654
## cytotoxicity.score           0.32324836
## Endothelial.cell             0.75028046
## Macrophage.Monocyte          0.27660093
## Monocyte                     0.27660093
## Myeloid.dendritic.cell       0.72994378
## Neutrophil                   0.52900694
## NK.cell                      0.79685303
## T.cell                       0.23772948
## T.cell.CD8.                  0.02111259
## 
## [[3]]

## 
## [[4]]
## 
##  Paired t-test
## 
## data:  IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Baseline", ]$IKZF1 and IKZF1_selection[IKZF1_selection$Sample_Timepoint %in% "Ontrx", ]$IKZF1
## t = -1.3059, df = 13, p-value = 0.2142
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.2857205  0.3169492
## sample estimates:
## mean difference 
##      -0.4843857 
## 
## 
## [[5]]
## 
##  Paired t-test
## 
## data:  TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Baseline", ]$TE_features and TE_genes_matrix_df_selection[TE_genes_matrix_df_selection$Sample_Timepoint %in% "Ontrx", ]$TE_features
## t = -0.25955, df = 13, p-value = 0.7993
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.0761521  0.8453033
## sample estimates:
## mean difference 
##      -0.1154244
################################################.  COMPARISON BETWEEN PAIRS for 24 immune pathways #########################################################
source("Analyze_individual_groups_for_24_pathways.R")

Gene_lists_for_ddGSEA<-fread("Gene_lists_for_ddGSEA.txt",header=TRUE)

C_GAS_gene_list_KEGG<-fread("C_GAS_gene_list_KEGG")
C_GAS_gene_list_KEGG<-C_GAS_gene_list_KEGG[-c(1),]
C_GAS_gene_list_KEGG<-data.frame(C_GAS_gene_list_KEGG)
colnames(C_GAS_gene_list_KEGG)<-"genes"



Gene_lists_for_ddGSEA<-Gene_lists_for_ddGSEA %>% 
  mutate(Genes=strsplit(Genes, ",")) %>% 
  unnest(Genes)

Gene_list<-split.data.frame(Gene_lists_for_ddGSEA[-1],Gene_lists_for_ddGSEA$`Gene Signature`)
geneSets<-lapply(Gene_list,"[[","Genes")

#add c-gas
geneSets$C_GAS_gene<-C_GAS_gene_list_KEGG$genes

##NOW calculate ssGSES for each sample using normalized counts 
gsva_24_gene_lists<-gsva(as.matrix(normalized_counts),geneSets,method="gsva",kcdf="Gaussian" ,verbose=FALSE)
## Warning: Calling gsva(expr=., gset.idx.list=., method=., ...) is deprecated; use
## a method-specific parameter object (see '?gsva').
## Warning in .gsva(expr, mapped.gset.idx.list, method, kcdf, rnaseq, abs.ranking,
## : Some gene sets have size one. Consider setting 'min.sz > 1'.
all.equal(colnames(gsva_24_gene_lists),Ordered_pheno$CMO_ID)
## [1] TRUE
gsva_24_gene_lists_df<-data.frame(t(gsva_24_gene_lists))
gsva_24_gene_lists_df$CMO_ID<-rownames(gsva_24_gene_lists_df)


get_individual_pairs_comparison_calculations_24_pathways(Cold_to_hot_switch)
## Joining with `by = join_by(CMO_ID)`
## [[1]]
##                                        p_values
## Angiogenesis                       0.2787094709
## Antigen.processing.machinery       0.1257218073
## CD8.T.effector                     0.0029980270
## Cell.cycle                         0.8730234361
## Cell.cycle.regulators              0.0582443271
## DNA.damage.repair                  0.6081471399
## DNA.replication                    0.8473638822
## DNA.replication.dependent.histones 0.8014711836
## EMT                                0.6553340698
## Fanconi.anemia                     0.7350825508
## FGFR3.related.genees               0.8935230004
## Homologous.recombination           0.9322775679
## IL1beta.Response                   0.8613585932
## Immune.checkpoint                  0.1558772903
## Mismatch.repair                    0.3425272455
## NFkB.response                      0.0114579655
## NHEJ                               0.7521802003
## Nucleotide.excision.repair         0.6682228026
## P53.signalling.pathway             0.4273972431
## Pan.F.TBRS                         0.3208205237
## TNFalpha.Response                  0.1953675489
## Type.I.IFN.Response                0.4343042993
## Type.II.IFN.Response               0.0004479311
## WNT.target                         0.8240490397
## C_GAS_gene                         0.0085568124
## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

get_individual_pairs_comparison_calculations_24_pathways(Hot_to_cold_switch)
## Joining with `by = join_by(CMO_ID)`
## [[1]]
##                                      p_values
## Angiogenesis                       0.58359440
## Antigen.processing.machinery       0.17329909
## CD8.T.effector                     0.44427407
## Cell.cycle                         0.29453898
## Cell.cycle.regulators              0.58839115
## DNA.damage.repair                  0.25848764
## DNA.replication                    0.52312171
## DNA.replication.dependent.histones 0.26997071
## EMT                                0.06792738
## Fanconi.anemia                     0.58211218
## FGFR3.related.genees               0.70112885
## Homologous.recombination           0.47624561
## IL1beta.Response                   0.17564642
## Immune.checkpoint                  0.35974875
## Mismatch.repair                    0.23585416
## NFkB.response                      0.46604582
## NHEJ                               0.85870214
## Nucleotide.excision.repair         0.86471630
## P53.signalling.pathway             0.24932214
## Pan.F.TBRS                         0.69794817
## TNFalpha.Response                  0.22191441
## Type.I.IFN.Response                0.34362391
## Type.II.IFN.Response               0.31796466
## WNT.target                         0.52749729
## C_GAS_gene                         0.21775491
## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

get_individual_pairs_comparison_calculations_24_pathways(NO_switch_samples_cold)
## Joining with `by = join_by(CMO_ID)`
## [[1]]
##                                      p_values
## Angiogenesis                       0.83112116
## Antigen.processing.machinery       0.59720144
## CD8.T.effector                     0.11815859
## Cell.cycle                         0.78678211
## Cell.cycle.regulators              0.96355426
## DNA.damage.repair                  0.70617025
## DNA.replication                    0.69916442
## DNA.replication.dependent.histones 0.89053054
## EMT                                0.58617211
## Fanconi.anemia                     0.73847954
## FGFR3.related.genees               0.89407446
## Homologous.recombination           0.87668266
## IL1beta.Response                   0.75004841
## Immune.checkpoint                  0.06723543
## Mismatch.repair                    0.93444966
## NFkB.response                      0.48053525
## NHEJ                               0.54077695
## Nucleotide.excision.repair         0.90088477
## P53.signalling.pathway             0.97605296
## Pan.F.TBRS                         0.87135097
## TNFalpha.Response                  0.88834949
## Type.I.IFN.Response                0.71075439
## Type.II.IFN.Response               0.05727897
## WNT.target                         0.78709913
## C_GAS_gene                         0.87069385
## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

get_individual_pairs_comparison_calculations_24_pathways(NO_switch_samples_hot)
## Joining with `by = join_by(CMO_ID)`
## [[1]]
##                                      p_values
## Angiogenesis                       0.63171267
## Antigen.processing.machinery       0.31091901
## CD8.T.effector                     0.02426114
## Cell.cycle                         0.35234336
## Cell.cycle.regulators              0.51290785
## DNA.damage.repair                  0.64660984
## DNA.replication                    0.37184510
## DNA.replication.dependent.histones 0.23184674
## EMT                                0.21094083
## Fanconi.anemia                     0.62550335
## FGFR3.related.genees               0.30859168
## Homologous.recombination           0.47838641
## IL1beta.Response                   0.91878861
## Immune.checkpoint                  0.79619146
## Mismatch.repair                    0.25251872
## NFkB.response                      0.55756185
## NHEJ                               0.37410148
## Nucleotide.excision.repair         0.74787174
## P53.signalling.pathway             0.47291749
## Pan.F.TBRS                         0.84605335
## TNFalpha.Response                  0.42486164
## Type.I.IFN.Response                0.62653510
## Type.II.IFN.Response               0.07651948
## WNT.target                         0.32044491
## C_GAS_gene                         0.53823990
## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

#rmarkdown::render("Script_baseline_ontrx.R")